{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Fine Tuning a gpt-35-turbo model\n",
    "\n",
    "Source: [Azure OpenAI GPT 3.5 Turbo fine-tuning tutorial](https://learn.microsoft.com/azure/ai-services/openai/tutorials/fine-tune?tabs=python-new%2Ccommand-line?WT.mc_id=academic-105485-koreyst)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Setup\n",
    "\n",
    "You'll need to create two files training_set.jsonl and validation_set.jsonl.\n",
    "\n",
    "Create the files in the same directory that you're running the Jupyter Notebook, and copy the contents of the following code blocks to the corresponding files: \n",
    "\n",
    "Now you need to run some preliminary checks on our training and validation files."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Number of examples in training set: 10\n",
      "First example in training set:\n",
      "{'role': 'system', 'content': 'Clippy is a factual chatbot that is also sarcastic.'}\n",
      "{'role': 'user', 'content': 'Who discovered Antarctica?'}\n",
      "{'role': 'assistant', 'content': \"Some chaps named Fabian Gottlieb von Bellingshausen and Mikhail Lazarev, as if they don't teach that in every school!\"}\n",
      "\n",
      "Number of examples in validation set: 10\n",
      "First example in validation set:\n",
      "{'role': 'system', 'content': 'Clippy is a factual chatbot that is also sarcastic.'}\n",
      "{'role': 'user', 'content': \"What's the capital of Australia?\"}\n",
      "{'role': 'assistant', 'content': \"It's Canberra, not Sydney. Shocking, I know!\"}\n"
     ]
    }
   ],
   "source": [
    "import json\n",
    "\n",
    "# Load the training set\n",
    "with open('training_set.jsonl', 'r', encoding='utf-8') as f:\n",
    "    training_dataset = [json.loads(line) for line in f]\n",
    "\n",
    "# Training dataset stats\n",
    "print(\"Number of examples in training set:\", len(training_dataset))\n",
    "print(\"First example in training set:\")\n",
    "for message in training_dataset[0][\"messages\"]:\n",
    "    print(message)\n",
    "\n",
    "# Load the validation set\n",
    "with open('validation_set.jsonl', 'r', encoding='utf-8') as f:\n",
    "    validation_dataset = [json.loads(line) for line in f]\n",
    "\n",
    "# Validation dataset stats\n",
    "print(\"\\nNumber of examples in validation set:\", len(validation_dataset))\n",
    "print(\"First example in validation set:\")\n",
    "for message in validation_dataset[0][\"messages\"]:\n",
    "    print(message)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Now you can then run some additional code from OpenAI using the tiktoken library to validate the token counts. Individual examples need to remain under the gpt-35-turbo-0613 model's input token limit of 4096 tokens"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Processing file: training_set.jsonl\n",
      "\n",
      "#### Distribution of total tokens:\n",
      "min / max: 47, 62\n",
      "mean / median: 52.1, 50.5\n",
      "p5 / p95: 47.9, 57.5\n",
      "\n",
      "#### Distribution of assistant tokens:\n",
      "min / max: 13, 30\n",
      "mean / median: 17.6, 15.5\n",
      "p5 / p95: 13.0, 21.9\n",
      "**************************************************\n",
      "Processing file: validation_set.jsonl\n",
      "\n",
      "#### Distribution of total tokens:\n",
      "min / max: 43, 65\n",
      "mean / median: 51.4, 49.0\n",
      "p5 / p95: 45.7, 56.9\n",
      "\n",
      "#### Distribution of assistant tokens:\n",
      "min / max: 8, 29\n",
      "mean / median: 15.9, 13.5\n",
      "p5 / p95: 11.6, 20.9\n",
      "**************************************************\n"
     ]
    }
   ],
   "source": [
    "import json\n",
    "import tiktoken\n",
    "import numpy as np\n",
    "from collections import defaultdict\n",
    "\n",
    "encoding = tiktoken.get_encoding(\"cl100k_base\") # default encoding used by gpt-4, turbo, and text-embedding-ada-002 models\n",
    "\n",
    "def num_tokens_from_messages(messages, tokens_per_message=3, tokens_per_name=1):\n",
    "    num_tokens = 0\n",
    "    for message in messages:\n",
    "        num_tokens += tokens_per_message\n",
    "        for key, value in message.items():\n",
    "            num_tokens += len(encoding.encode(value))\n",
    "            if key == \"name\":\n",
    "                num_tokens += tokens_per_name\n",
    "    num_tokens += 3\n",
    "    return num_tokens\n",
    "\n",
    "def num_assistant_tokens_from_messages(messages):\n",
    "    num_tokens = 0\n",
    "    for message in messages:\n",
    "        if message[\"role\"] == \"assistant\":\n",
    "            num_tokens += len(encoding.encode(message[\"content\"]))\n",
    "    return num_tokens\n",
    "\n",
    "def print_distribution(values, name):\n",
    "    print(f\"\\n#### Distribution of {name}:\")\n",
    "    print(f\"min / max: {min(values)}, {max(values)}\")\n",
    "    print(f\"mean / median: {np.mean(values)}, {np.median(values)}\")\n",
    "    print(f\"p5 / p95: {np.quantile(values, 0.1)}, {np.quantile(values, 0.9)}\")\n",
    "\n",
    "files = ['training_set.jsonl', 'validation_set.jsonl']\n",
    "\n",
    "for file in files:\n",
    "    print(f\"Processing file: {file}\")\n",
    "    with open(file, 'r', encoding='utf-8') as f:\n",
    "        dataset = [json.loads(line) for line in f]\n",
    "\n",
    "    total_tokens = []\n",
    "    assistant_tokens = []\n",
    "\n",
    "    for ex in dataset:\n",
    "        messages = ex.get(\"messages\", {})\n",
    "        total_tokens.append(num_tokens_from_messages(messages))\n",
    "        assistant_tokens.append(num_assistant_tokens_from_messages(messages))\n",
    "    \n",
    "    print_distribution(total_tokens, \"total tokens\")\n",
    "    print_distribution(assistant_tokens, \"assistant tokens\")\n",
    "    print('*' * 50)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Training file ID: file-b030618779b242ee9195620f6862d20c\n",
      "Validation file ID: file-65d859959d5d4663ac37b4981ac55480\n"
     ]
    }
   ],
   "source": [
    "# Upload fine-tuning files\n",
    "\n",
    "import os\n",
    "from openai import AzureOpenAI\n",
    "\n",
    "client = AzureOpenAI(\n",
    "  azure_endpoint = os.getenv(\"AZURE_OPENAI_ENDPOINT\"), \n",
    "  api_key=os.getenv(\"AZURE_OPENAI_KEY\"),  \n",
    "  api_version=\"2023-12-01-preview\"  # This API version or later is required to access fine-tuning for turbo/babbage-002/davinci-002\n",
    ")\n",
    "\n",
    "training_file_name = 'training_set.jsonl'\n",
    "validation_file_name = 'validation_set.jsonl'\n",
    "\n",
    "# Upload the training and validation dataset files to Azure OpenAI with the SDK.\n",
    "\n",
    "training_response = client.files.create(\n",
    "    file=open(training_file_name, \"rb\"), purpose=\"fine-tune\"\n",
    ")\n",
    "training_file_id = training_response.id\n",
    "\n",
    "validation_response = client.files.create(\n",
    "    file=open(validation_file_name, \"rb\"), purpose=\"fine-tune\"\n",
    ")\n",
    "validation_file_id = validation_response.id\n",
    "\n",
    "print(\"Training file ID:\", training_file_id)\n",
    "print(\"Validation file ID:\", validation_file_id)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Begin fine-tuning\n",
    "\n",
    "Now that the fine-tuning files have been successfully uploaded you can submit your fine-tuning training job:\n",
    "\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "ename": "BadRequestError",
     "evalue": "Error code: 400 - {'error': {'code': 'invalidPayload', 'message': 'The specified base model does not support fine-tuning.'}}",
     "output_type": "error",
     "traceback": [
      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
      "\u001b[0;31mBadRequestError\u001b[0m                           Traceback (most recent call last)",
      "Cell \u001b[0;32mIn[6], line 1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m response \u001b[38;5;241m=\u001b[39m \u001b[43mclient\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mfine_tuning\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mjobs\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mcreate\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m      2\u001b[0m \u001b[43m    \u001b[49m\u001b[43mtraining_file\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mtraining_file_id\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m      3\u001b[0m \u001b[43m    \u001b[49m\u001b[43mvalidation_file\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mvalidation_file_id\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m      4\u001b[0m \u001b[43m    \u001b[49m\u001b[43mmodel\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mgpt-35-turbo\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;66;43;03m# Enter base model name. Note that in Azure OpenAI the model name contains dashes and cannot contain dot/period characters. \u001b[39;49;00m\n\u001b[1;32m      5\u001b[0m \u001b[43m)\u001b[49m\n\u001b[1;32m      7\u001b[0m job_id \u001b[38;5;241m=\u001b[39m response\u001b[38;5;241m.\u001b[39mid\n\u001b[1;32m      9\u001b[0m \u001b[38;5;66;03m# You can use the job ID to monitor the status of the fine-tuning job.\u001b[39;00m\n\u001b[1;32m     10\u001b[0m \u001b[38;5;66;03m# The fine-tuning job will take some time to start and complete.\u001b[39;00m\n",
      "File \u001b[0;32m~/.python/current/lib/python3.10/site-packages/openai/resources/fine_tuning/jobs.py:109\u001b[0m, in \u001b[0;36mJobs.create\u001b[0;34m(self, model, training_file, hyperparameters, suffix, validation_file, extra_headers, extra_query, extra_body, timeout)\u001b[0m\n\u001b[1;32m     41\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mcreate\u001b[39m(\n\u001b[1;32m     42\u001b[0m     \u001b[38;5;28mself\u001b[39m,\n\u001b[1;32m     43\u001b[0m     \u001b[38;5;241m*\u001b[39m,\n\u001b[0;32m   (...)\u001b[0m\n\u001b[1;32m     54\u001b[0m     timeout: \u001b[38;5;28mfloat\u001b[39m \u001b[38;5;241m|\u001b[39m httpx\u001b[38;5;241m.\u001b[39mTimeout \u001b[38;5;241m|\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m \u001b[38;5;241m|\u001b[39m NotGiven \u001b[38;5;241m=\u001b[39m NOT_GIVEN,\n\u001b[1;32m     55\u001b[0m ) \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m>\u001b[39m FineTuningJob:\n\u001b[1;32m     56\u001b[0m \u001b[38;5;250m    \u001b[39m\u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[1;32m     57\u001b[0m \u001b[38;5;124;03m    Creates a fine-tuning job which begins the process of creating a new model from\u001b[39;00m\n\u001b[1;32m     58\u001b[0m \u001b[38;5;124;03m    a given dataset.\u001b[39;00m\n\u001b[0;32m   (...)\u001b[0m\n\u001b[1;32m    107\u001b[0m \u001b[38;5;124;03m      timeout: Override the client-level default timeout for this request, in seconds\u001b[39;00m\n\u001b[1;32m    108\u001b[0m \u001b[38;5;124;03m    \"\"\"\u001b[39;00m\n\u001b[0;32m--> 109\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_post\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m    110\u001b[0m \u001b[43m        \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43m/fine_tuning/jobs\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[1;32m    111\u001b[0m \u001b[43m        \u001b[49m\u001b[43mbody\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mmaybe_transform\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m    112\u001b[0m \u001b[43m            \u001b[49m\u001b[43m{\u001b[49m\n\u001b[1;32m    113\u001b[0m \u001b[43m                \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mmodel\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[43mmodel\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    114\u001b[0m \u001b[43m                \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mtraining_file\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[43mtraining_file\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    115\u001b[0m \u001b[43m                \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mhyperparameters\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[43mhyperparameters\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    116\u001b[0m \u001b[43m                \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43msuffix\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[43msuffix\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    117\u001b[0m \u001b[43m                \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mvalidation_file\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[43mvalidation_file\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    118\u001b[0m \u001b[43m            \u001b[49m\u001b[43m}\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    119\u001b[0m \u001b[43m            \u001b[49m\u001b[43mjob_create_params\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mJobCreateParams\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    120\u001b[0m \u001b[43m        \u001b[49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    121\u001b[0m \u001b[43m        \u001b[49m\u001b[43moptions\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mmake_request_options\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m    122\u001b[0m \u001b[43m            \u001b[49m\u001b[43mextra_headers\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mextra_headers\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mextra_query\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mextra_query\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mextra_body\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mextra_body\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mtimeout\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mtimeout\u001b[49m\n\u001b[1;32m    123\u001b[0m \u001b[43m        \u001b[49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    124\u001b[0m \u001b[43m        \u001b[49m\u001b[43mcast_to\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mFineTuningJob\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    125\u001b[0m \u001b[43m    \u001b[49m\u001b[43m)\u001b[49m\n",
      "File \u001b[0;32m~/.python/current/lib/python3.10/site-packages/openai/_base_client.py:1200\u001b[0m, in \u001b[0;36mSyncAPIClient.post\u001b[0;34m(self, path, cast_to, body, options, files, stream, stream_cls)\u001b[0m\n\u001b[1;32m   1186\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mpost\u001b[39m(\n\u001b[1;32m   1187\u001b[0m     \u001b[38;5;28mself\u001b[39m,\n\u001b[1;32m   1188\u001b[0m     path: \u001b[38;5;28mstr\u001b[39m,\n\u001b[0;32m   (...)\u001b[0m\n\u001b[1;32m   1195\u001b[0m     stream_cls: \u001b[38;5;28mtype\u001b[39m[_StreamT] \u001b[38;5;241m|\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m,\n\u001b[1;32m   1196\u001b[0m ) \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m>\u001b[39m ResponseT \u001b[38;5;241m|\u001b[39m _StreamT:\n\u001b[1;32m   1197\u001b[0m     opts \u001b[38;5;241m=\u001b[39m FinalRequestOptions\u001b[38;5;241m.\u001b[39mconstruct(\n\u001b[1;32m   1198\u001b[0m         method\u001b[38;5;241m=\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mpost\u001b[39m\u001b[38;5;124m\"\u001b[39m, url\u001b[38;5;241m=\u001b[39mpath, json_data\u001b[38;5;241m=\u001b[39mbody, files\u001b[38;5;241m=\u001b[39mto_httpx_files(files), \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39moptions\n\u001b[1;32m   1199\u001b[0m     )\n\u001b[0;32m-> 1200\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m cast(ResponseT, \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mrequest\u001b[49m\u001b[43m(\u001b[49m\u001b[43mcast_to\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mopts\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mstream\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mstream\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mstream_cls\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mstream_cls\u001b[49m\u001b[43m)\u001b[49m)\n",
      "File \u001b[0;32m~/.python/current/lib/python3.10/site-packages/openai/_base_client.py:889\u001b[0m, in \u001b[0;36mSyncAPIClient.request\u001b[0;34m(self, cast_to, options, remaining_retries, stream, stream_cls)\u001b[0m\n\u001b[1;32m    880\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mrequest\u001b[39m(\n\u001b[1;32m    881\u001b[0m     \u001b[38;5;28mself\u001b[39m,\n\u001b[1;32m    882\u001b[0m     cast_to: Type[ResponseT],\n\u001b[0;32m   (...)\u001b[0m\n\u001b[1;32m    887\u001b[0m     stream_cls: \u001b[38;5;28mtype\u001b[39m[_StreamT] \u001b[38;5;241m|\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m,\n\u001b[1;32m    888\u001b[0m ) \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m>\u001b[39m ResponseT \u001b[38;5;241m|\u001b[39m _StreamT:\n\u001b[0;32m--> 889\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_request\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m    890\u001b[0m \u001b[43m        \u001b[49m\u001b[43mcast_to\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mcast_to\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    891\u001b[0m \u001b[43m        \u001b[49m\u001b[43moptions\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43moptions\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    892\u001b[0m \u001b[43m        \u001b[49m\u001b[43mstream\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mstream\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    893\u001b[0m \u001b[43m        \u001b[49m\u001b[43mstream_cls\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mstream_cls\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    894\u001b[0m \u001b[43m        \u001b[49m\u001b[43mremaining_retries\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mremaining_retries\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    895\u001b[0m \u001b[43m    \u001b[49m\u001b[43m)\u001b[49m\n",
      "File \u001b[0;32m~/.python/current/lib/python3.10/site-packages/openai/_base_client.py:980\u001b[0m, in \u001b[0;36mSyncAPIClient._request\u001b[0;34m(self, cast_to, options, remaining_retries, stream, stream_cls)\u001b[0m\n\u001b[1;32m    977\u001b[0m         err\u001b[38;5;241m.\u001b[39mresponse\u001b[38;5;241m.\u001b[39mread()\n\u001b[1;32m    979\u001b[0m     log\u001b[38;5;241m.\u001b[39mdebug(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mRe-raising status error\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[0;32m--> 980\u001b[0m     \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_make_status_error_from_response(err\u001b[38;5;241m.\u001b[39mresponse) \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m\n\u001b[1;32m    982\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_process_response(\n\u001b[1;32m    983\u001b[0m     cast_to\u001b[38;5;241m=\u001b[39mcast_to,\n\u001b[1;32m    984\u001b[0m     options\u001b[38;5;241m=\u001b[39moptions,\n\u001b[0;32m   (...)\u001b[0m\n\u001b[1;32m    987\u001b[0m     stream_cls\u001b[38;5;241m=\u001b[39mstream_cls,\n\u001b[1;32m    988\u001b[0m )\n",
      "\u001b[0;31mBadRequestError\u001b[0m: Error code: 400 - {'error': {'code': 'invalidPayload', 'message': 'The specified base model does not support fine-tuning.'}}"
     ]
    }
   ],
   "source": [
    "response = client.fine_tuning.jobs.create(\n",
    "    training_file=training_file_id,\n",
    "    validation_file=validation_file_id,\n",
    "    model=\"gpt-35-turbo\", # Enter base model name. Note that in Azure OpenAI the model name contains dashes and cannot contain dot/period characters. \n",
    ")\n",
    "\n",
    "job_id = response.id\n",
    "\n",
    "# You can use the job ID to monitor the status of the fine-tuning job.\n",
    "# The fine-tuning job will take some time to start and complete.\n",
    "\n",
    "print(\"Job ID:\", response.id)\n",
    "print(\"Status:\", response.id)\n",
    "print(response.model_dump_json(indent=2))"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.10.8"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
